LOB

  • Feedforward model with one hidden layer.

  • Trained just using LOB and not Liquidity measures: Input dimension is 60 x 21 = 1260

  • Mean squared error used as loss function.

  • Optimizer: Adam

  • Training set consists of 202 days (~80%)

  • Validation set consists of 51 days (~20%)

Shown below:

  • Network structure

  • Settings used for training

  • Plots of model performance

Other evaluations:

Liquidity Measures

LOB + Liquidity Measures

Network Structure

In [1]:
import os
import torch
from torchsummary import summary
from feedforward_one_layer import FFNN1
from IPython.display import HTML
def hide_code(): return HTML('''<script>code_show=true; function code_toggle() {if (code_show){$('div.input').hide();} else {$('div.input').show();}code_show = !code_show} $( document ).ready(code_toggle);</script><form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
model=FFNN1()
ckpt_name = os.listdir()[[*map(lambda x: x.count('ckpt'),os.listdir())].index(True)]
ckpt = torch.load('./'+ckpt_name,map_location=torch.device('cpu'))
print(f'Input size: {60*21}')
summary(model.float(), input_size=(60,21))
hide_code()
Input size: 1260
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
================================================================
            Linear-1                   [-1, 64]          80,704
       BatchNorm1d-2                   [-1, 64]             128
         LeakyReLU-3                   [-1, 64]               0
           Dropout-4                   [-1, 64]               0
            Linear-5                    [-1, 5]             325
       BatchNorm1d-6                    [-1, 5]              10
         LeakyReLU-7                    [-1, 5]               0
================================================================
Total params: 81,167
Trainable params: 81,167
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.31
Estimated Total Size (MB): 0.32
----------------------------------------------------------------
Out[1]:

 Settings used in training

In [2]:
import pandas as pd
from configs.train_config import cfg
configuration = pd.concat([
                            pd.DataFrame.from_dict(cfg,'index').loc[['STOCK','LOB','LIQ_VARS']],\
                           pd.DataFrame.from_dict(cfg.TRAIN,'index').loc[['SHUFFLE','VAL_SHUFFLE','INTRADAY_SHUFFLE'\
                                                                    ,'SPLIT_RATIO','BATCH_SIZE','VAL_BATCH_SIZE']], \
                           pd.DataFrame.from_dict(cfg.MODEL,'index').loc[['BACKBONE','LOSS','DROPOUT_RATE','LEAKY_SLOPE']],\
                           pd.DataFrame.from_dict(cfg.OPTIMIZER,'index').loc[['LR','METHOD','LR_SCHEDULER','LAMBDA']]],\
              keys=['USED IN TRAINING','TRAIN','MODEL','OPTIMIZER']); configuration.columns=['CONFIGURATIONS']
print(f'Epoch loaded: {ckpt_name[5:-4]}')
configuration
Epoch loaded: 6000
Out[2]:
CONFIGURATIONS
USED IN TRAINING STOCK GARAN
LOB True
LIQ_VARS False
TRAIN SHUFFLE False
VAL_SHUFFLE False
INTRADAY_SHUFFLE False
SPLIT_RATIO 0.8
BATCH_SIZE 75
VAL_BATCH_SIZE 1
MODEL BACKBONE FFNN1
LOSS MSE
DROPOUT_RATE 0
LEAKY_SLOPE 0.5
OPTIMIZER LR 0.001
METHOD adam
LR_SCHEDULER ReduceLROnPlateau
LAMBDA 0
In [3]:
import sys
sys.path.append('./../')
import numpy as np
filenames = os.listdir(cfg.DATA.DATA_PATH)
[filenames.pop(i) for i,k in enumerate(filenames) if k.split('.')[-1]!='npy'] #.DS_STORE problem
filenames.sort()
train_datanames = []
if cfg.DATA.PORTION is not None:
    filenames = filenames[:int(len(filenames)*cfg.DATA.PORTION)]
for filename in filenames:
    if len(train_datanames) < int(len(filenames)*cfg.TRAIN.SPLIT_RATIO):
        train_datanames.append(filename)
    else:
        break
    val_datanames = [i for i in filenames if not train_datanames.count(i)]
    
X_t = [] ; y_t = [] ; X_v = [] ; y_v = []
for i in train_datanames:
    item = np.load(os.path.join(cfg.DATA.DATA_PATH,i),allow_pickle='TRUE').item()
    X_t.append(torch.from_numpy(item['X'])); y_t.append(torch.from_numpy(item['y']))
for i in val_datanames:
    item = np.load(os.path.join(cfg.DATA.DATA_PATH,i),allow_pickle='TRUE').item()
    X_v.append(torch.from_numpy(item['X'])); y_v.append(torch.from_numpy(item['y']))


model.double();model.load_state_dict(ckpt['model_state'], strict=False);torch.set_grad_enabled(False);model.eval()

temp = []
for i in X_t:
    for k in i:
        temp.append(model(torch.reshape(k,(1,*k.shape)))[0])
temp_v = []     
for i in X_v:
    for k in i:
        temp_v.append(model(torch.reshape(k,(1,*k.shape)))[0])

for i,k in zip(temp,torch.cat(temp).reshape(-1,5)):
    if i.tolist() != k.tolist():
        raise Exception
for i,k in zip(temp_v,torch.cat(temp_v).reshape(-1,5)):
    if i.tolist() != k.tolist():
        raise Exception


y_t_pred = torch.cat(temp).reshape(-1,5) ; y_v_pred = torch.cat(temp_v).reshape(-1,5)
y_t = torch.cat(y_t) ; y_v = torch.cat(y_v)

mse_train = [torch.mean((i-j)**2).numpy() for i,j in zip(y_t.transpose(1,0),y_t_pred.transpose(1,0))]
mse_val = [torch.mean((i-j)**2).numpy() for i,j in zip(y_v.transpose(1,0),y_v_pred.transpose(1,0))]
mape_train = [torch.mean(torch.abs(1-j/i)).numpy()*100 for i,j in zip(y_t.transpose(1,0),y_t_pred.transpose(1,0))]
mape_val = [torch.mean(torch.abs(1-j/i)).numpy()*100 for i,j in zip(y_v.transpose(1,0),y_v_pred.transpose(1,0))]

Evaluation plots

Below we have the converged and the best result of model each having five plots for five different variables we wanted to predict. They are created by plotting the training and prediction performance of the model throughout the year for each day's trading window, which goes from the 61st trading minute until the last trading minute of the day.

These windows are ordered and stacked, starting from the first trading day's window until the last day's trading window.

On the x-axis we always have the minutes and on the y-axis the corresponding variable of the plot.

In [4]:
from CODES.utils.plotter import plotter

args = [ #FIRST AXIS
        [ [[i[0].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[0].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[0].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[0].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]  
         ,[dict(cellText=[[mse_train[0],mape_train[0]],[mse_val[0],mape_val[0]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,None
         ,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Mid Price',dict(fontsize=20)]
         ,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
        ]
        ,#SECOND AXIS
        [ [[i[1].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[1].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[1].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[1].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]  
         ,[dict(cellText=[[mse_train[1],mape_train[1]],[mse_val[1],mape_val[1]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,None
         ,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Bid Price Expectation',dict(fontsize=20)]
         ,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
        ]
        ,#THIRD AXIS
        [ [[i[2].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[2].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[2].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[2].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]  
         ,[dict(cellText=[[mse_train[2],mape_train[2]],[mse_val[2],mape_val[2]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,None
         ,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Ask Price Expectation',dict(fontsize=20)]
         ,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
        ]
        ,#FOURTH AXIS
        [ [[i[3].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[3].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[3].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[3].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]  
         ,[dict(cellText=[[mse_train[3],mape_train[3]],[mse_val[3],mape_val[3]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,[-0.0002,0.0004]
         ,['Minutes',dict(fontsize=15)], ['Variance in TL$^{2}$',dict(fontsize=15)], ['Bid Price Variance',dict(fontsize=20)]
         ,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='best')],[dict(b=True,axis='y',alpha=0.5)]
        ]
        ,#FIFTH AXIS
        [ [[i[4].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[4].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[4].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[4].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]  
         ,[dict(cellText= [[mse_train[4],mape_train[4]],[mse_val[4],mape_val[4]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,[-0.0002,0.0004]
         ,['Minutes',dict(fontsize=15)], ['Variance in TL$^{2}$',dict(fontsize=15)], ['Ask Price Variance',dict(fontsize=20)]
         ,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='best')],[dict(b=True,axis='y',alpha=0.5)]
        ]
       ]

attrs = ['plot','plot','plot','plot'
         ,'make_table'
         ,'set_ylim'
         ,'set_xlabel','set_ylabel','set_title'
         ,'legend','grid']

plotter(args,attrs,fig_title=f'Converged Result of Model\n Stock: GARAN\n Year: 2017\n Epoch:{ckpt_name[5:-4]} ',dpi=300, ncols=1,save_path = os.getcwd())
In [5]:
ckpt_name = os.listdir('./mvp_epochs')[[*map(lambda x: x.count('ckpt'),os.listdir('./mvp_epochs'))].index(True)]
ckpt = torch.load('./mvp_epochs/'+ckpt_name,map_location=torch.device('cpu'))
model.double();model.load_state_dict(ckpt['model_state'], strict=False);model.eval();torch.set_grad_enabled(False)

temp = []
for i in X_t:
    for k in i:
        temp.append(model(torch.reshape(k,(1,*k.shape)))[0])
temp_v = []     
for i in X_v:
    for k in i:
        temp_v.append(model(torch.reshape(k,(1,*k.shape)))[0])

for i,k in zip(temp,torch.cat(temp).reshape(-1,5)):
    if i.tolist() != k.tolist():
        raise Exception
for i,k in zip(temp_v,torch.cat(temp_v).reshape(-1,5)):
    if i.tolist() != k.tolist():
        raise Exception

y_t_pred = torch.cat(temp).reshape(-1,5) ; y_v_pred = torch.cat(temp_v).reshape(-1,5)

mse_train = [torch.mean((i-j)**2).numpy() for i,j in zip(y_t.transpose(1,0),y_t_pred.transpose(1,0))]
mse_val = [torch.mean((i-j)**2).numpy() for i,j in zip(y_v.transpose(1,0),y_v_pred.transpose(1,0))]
mape_train = [torch.mean(torch.abs(1-j/i)).numpy()*100 for i,j in zip(y_t.transpose(1,0),y_t_pred.transpose(1,0))]
mape_val = [torch.mean(torch.abs(1-j/i)).numpy()*100 for i,j in zip(y_v.transpose(1,0),y_v_pred.transpose(1,0))]

args = [ #FIRST AXIS
        [ [[i[0].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[0].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[0].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[0].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]  
         ,[dict(cellText=[[mse_train[0],mape_train[0]],[mse_val[0],mape_val[0]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,None
         ,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Mid Price',dict(fontsize=20)]
         ,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
        ]
        ,#SECOND AXIS
        [ [[i[1].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[1].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[1].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[1].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]  
         ,[dict(cellText=[[mse_train[1],mape_train[1]],[mse_val[1],mape_val[1]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,None
         ,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Bid Price Expectation',dict(fontsize=20)]
         ,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
        ]
        ,#THIRD AXIS
        [ [[i[2].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[2].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[2].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[2].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]  
         ,[dict(cellText=[[mse_train[2],mape_train[2]],[mse_val[2],mape_val[2]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,None
         ,['Minutes',dict(fontsize=15)], ['Price in TL',dict(fontsize=15)], ['Ask Price Expectation',dict(fontsize=20)]
         ,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='upper left')],[dict(b=True,axis='y',alpha=0.5)]
        ]
        ,#FOURTH AXIS
        [ [[i[3].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[3].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[3].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[3].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]  
         ,[dict(cellText=[[mse_train[3],mape_train[3]],[mse_val[3],mape_val[3]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,[-0.0002,0.0004]
         ,['Minutes',dict(fontsize=15)], ['Variance in TL$^{2}$',dict(fontsize=15)], ['Bid Price Variance',dict(fontsize=20)]
         ,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='best')],[dict(b=True,axis='y',alpha=0.5)]
        ]
        ,#FIFTH AXIS
        [ [[i[4].detach().numpy() for i in y_t],'-',dict(color='blue',linewidth=2,label='training data',alpha=0.5)],[[i[4].detach().numpy() for i in y_t_pred],'-',dict(color='green',linewidth=0.5,label='learned by model')],[range(len(y_t),len(y_t)+len(y_v)),[i[4].detach().numpy() for i in y_v],'-',dict(color='purple',linewidth=2,label='validation data',alpha=1)],[range(len(y_t),len(y_t)+len(y_v_pred)),[i[4].detach().numpy() for i in y_v_pred],'-',dict(color='darkorange',linewidth=0.5,label='predicted by model')]  
         ,[dict(cellText= [[mse_train[4],mape_train[4]],[mse_val[4],mape_val[4]]],rowLabels=['Training Error','Validation Error'],colLabels=['Mean Squared','Mean Absolute Percentage (%)'],loc='lower right'),{'row_scale':2,'col_scale':0.5,'fontsize':16}]
         ,[-0.0002,0.0004]
         ,['Minutes',dict(fontsize=15)], ['Variance in TL$^{2}$',dict(fontsize=15)], ['Ask Price Variance',dict(fontsize=20)]
         ,[dict(ncol=1,shadow=1,columnspacing=1,fontsize=20,loc='best')],[dict(b=True,axis='y',alpha=0.5)]
        ]
       ]

attrs = ['plot','plot','plot','plot'
         ,'make_table'
         ,'set_ylim'
         ,'set_xlabel','set_ylabel','set_title'
         ,'legend','grid']

plotter(args,attrs,fig_title=f'Best Result of Model\n Stock: GARAN\n Year: 2017\n Epoch:{ckpt_name[5:-4]} ',dpi=300, ncols=1,save_path = os.getcwd())